# encoding=utf-8
import re
import requests
from lxml import etree
from scrapy_redis.spiders import RedisSpider
from Sina_spider2.weiboID import weiboID
from scrapy.selector import Selector
from scrapy.http import Request
from Sina_spider2.items import TopicItem
from Sina_spider2.util.numberConvert import *
import json


class Spider(RedisSpider):
    name = "topicSpider"
    host = "http://m.weibo.cn"
    redis_key = "topicSpider:start_urls"
    start_urls = ['http://m.weibo.cn/container/getIndex?containerid=100803',
                  'http://m.weibo.cn/container/getIndex?containerid=100803_-_page_hot_list',
                  'http://m.weibo.cn/container/getIndex?containerid=100803_ctg1_2_-_page_topics_ctg1__2',
                  'http://m.weibo.cn/container/getIndex?containerid=100803_ctg1_102_-_page_topics_ctg1__102',
                  'http://m.weibo.cn/container/getIndex?containerid=100803_ctg1_1_-_page_topics_ctg1__1',
                  ]
    #    for ID in weiboID:
    #        url = "http://weibo.cn/%s/profile?filter=1&page=1" % ID
    #        start_urls.append(url)

    def start_requests(self):
        for url in self.start_urls:
            print url
            yield Request(url=url, callback=self.parse, dont_filter=True)

    def parse(self, response):
        """ 抓取微博话题数据 """
        print response.body

        result = json.loads(response.body)
        cards = result['cards'][0]['card_group']
        item = TopicItem()

        index = 0
        for topic in cards:
            try:
                index += 1
                item['homepage'] = topic['scheme']
                item['category'] = topic['category']
                item['title'] = topic['card_type_name']
                item['desc'] = topic['desc1']

                desc2 = topic['desc2'].replace("讨论".encode(), ' ')
                desc2 =  desc2.replace("阅读".encode(), ' ').strip()

                item['reply'] = str2number(desc2.split(' ')[0])
                item['view'] = str2number(desc2.split(' ')[1])
                item['number'] = index

                yield item
            except Exception as e:
                print e.message
                continue

        print cards
